1 Introduction

This is a Notebook for visualizing dosage plot on Arabidopsis sample Gaetan. The input data is originally from bin-by-sam analysis. Details are as follow: 1. Bin-by-sam.py (credit to Meric). 3 samples input into this python script: the chromoanagenesis sample, two controls from mixture of low coverage WT Arabidopsis (in order to have a similar sequence depth with the chromoanagenesis one). We tried different bin sizes, from 2.5kb to 100kb (2.5, 5, 10, 100). 2. Plot out relative reads coverage in JMP. By using JMP, we plot out the relative reads coverage for three individuals, and confirm CNVs at the beginning part of chromoanagenesis sample. 3. Modify the .txt file for visualization by ggplot. To have a clear plot for the review paper, we decide to remake the dosage plot using ggplot. Since control1 is using as the baseline in bin-by-sam, so the relative read coverage is all 2, so we delete that samples’ information in the input file and only keep the chromoanagenesis sample and control2.

2 Outlines

  1. load libraries and data

3 Input libraries and data

3.1 add libraries

library("tidyverse")
library("ggplot2")
library("ggpubr")

3.2 add data

arabidopsis.5k <- read.csv("/Users/wendy/Desktop/Lab/arabidopsis/dosage_plotting_input_5kb_chr1_20211108.txt",sep = "\t")
head(arabidopsis.5k)
arabidopsis.100k <- read.csv("/Users/wendy/Desktop/Lab/arabidopsis/dosage_plotting_input_100kb_20211103.txt",sep = "\t")
head(arabidopsis.100k)
arabidopsis.5k.12M <- read.csv("/Users/wendy/Desktop/Lab/arabidopsis/dosage_plotting_input_5kb_chr1_12M-20220114.txt",sep = "\t")
head(arabidopsis.5k.12M)

4 plot 100kb all chromosomes

all.100kb <- ggplot(arabidopsis.100k, aes(Strt,Relative.Read.Coverage,color=Sample)) +
  geom_point(alpha=.6) +
  scale_color_manual(values = c("#999999","#E69F00")) +
  theme(axis.text.x = element_text(size = 8, angle = 90), axis.title.x = element_text(size = 14, margin = margin(t=10)), axis.title.y = element_text(size = 14, margin = margin(r=10)), legend.position = "None") + 
  scale_x_continuous(name="Genomic Position", breaks = NULL) +
  scale_y_continuous(name="Relative read \ncoverage") +
  facet_wrap(~Chrom, nrow = 1, scales = "free_x")
all.100kb

5 save the 100kb plot

ggsave("/Users/wendy/Desktop/Lab/arabidopsis/Arabidopsis_dosage_100kb_20211103.png",width = 10, height = 2)

6 plot 5kb chromosomes 1

all.5kb <- ggplot(arabidopsis.5k, aes(Strt,as.numeric(Relative.Read.Coverage.5k),color=Sample)) +
  geom_point(alpha=.6) +
  scale_color_manual(values = c("#999999","#E69F00")) +
  theme(axis.text.x = element_text(size = 8, angle = 90), axis.title.x = element_text(size = 14, margin = margin(t=10)), axis.title.y = element_text(size = 14, margin = margin(r=10)), legend.position = "None") +
  scale_x_continuous(name = "Chromosome 1", breaks = NULL) +
  scale_y_continuous(name = "Relative Read \nCoverage",limits = c(1,4))
all.5kb
Warning: Removed 129 rows containing missing values (geom_point).

7 save the 5kb plot

ggsave("/Users/wendy/Desktop/Lab/arabidopsis/plots/Arabidopsis_dosage_5kb_chr1_20211108.png",width = 15, height = 2)
Warning: Removed 129 rows containing missing values (geom_point).

8 plot 5kb chromosomes 1 0-1M

all.5kb.12M <- ggplot(arabidopsis.5k.12M, aes(Strt,as.numeric(Relative.Read.Coverage.5k),color=Sample)) +
  geom_point(alpha=.6) +
  scale_color_manual(values = c("#999999","#E69F00")) +
  theme(axis.text.x = element_text(size = 8, angle = 90), axis.title.x = element_text(size = 14, margin = margin(t=10)), axis.title.y = element_text(size = 14, margin = margin(r=10)),legend.position = "bottom") +
  scale_x_continuous(name = "Chromosome 1: 12Mb to 17Mb", breaks = NULL) +
  scale_y_continuous(name = "Relative Read \nCoverage",limits = c(1,4))
all.5kb.12M
Warning: Removed 35 rows containing missing values (geom_point).

8.1 add potential junction data

arabidopsis.anno <- read.csv('/Users/wendy/Desktop/Lab/arabidopsis/crossread-selected-2ctrl-500bp-annotate-20211027.txt', sep = '\t')
head(arabidopsis.anno)

9 change the order for breakpoint1

arabidopsis.anno$Chrom1 <- factor(arabidopsis.anno$Chrom1,levels = c("Chr5","Chr4","Chr3","Chr2","Chr1"))

10 Two dimentional plot

scatplot <- ggplot(arabidopsis.anno, aes(Chrom1_binstart,Chrom2_binstart,alpha=Size,size="1")) +
  geom_point() +
  scale_color_manual(values = c('#000000')) +
  scale_alpha_manual(values = c(0,0.2)) +
  scale_size_manual(values = c(1.5)) +
  theme(legend.position = 'none', axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank() ,axis.title.x = element_text(size = 14, margin = margin(t=10)), axis.title.y = element_text(size = 14,margin = margin(r=10))) +
  scale_x_continuous(name="Breakpoint1") +
  scale_y_continuous(name="Breakpoint2") + 
  facet_grid(rows = vars(arabidopsis.anno$Chrom1), cols = vars(arabidopsis.anno$Chrom2),scales = "free")
scatplot

11 save the scatter plot

ggsave("/Users/wendy/Desktop/Lab/arabidopsis/plots/Arabidopsis_potential_junctions_scatplot_20211103.png",height = 5, width = 5)

12 arragne two plots on one page

onepage <- ggarrange(all.100kb,all.5kb,all.5kb.12M,labels = c("A","B","C"), ncol = 1, nrow = 3, widths = 10, heights = c(3,3,4))
Warning: Removed 129 rows containing missing values (geom_point).
Warning: Removed 35 rows containing missing values (geom_point).
onepage

13 save one page three plots

ggsave("/Users/wendy/Desktop/Lab/arabidopsis/plots/Arabidopsis_dosage_all_chr1_12M_20220114.png",width = 10)
Saving 10 x 7 in image
LS0tCnRpdGxlOiAiQXJhYmlkb3BzaXNfZG9zYWdlX3Bsb3RfcG90ZW50aWFsX2p1bmN0aW9uX3Zpc3VhbGl6YXRpb24iCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgbnVtYmVyX3NlY3Rpb25zOiB5ZXMKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwogIGh0bWxfZG9jdW1lbnQ6CiAgICBkZl9wcmludDogcGFnZWQKICAgIHRvYzogeWVzCi0tLQoKIyBJbnRyb2R1Y3Rpb24gIwpUaGlzIGlzIGEgTm90ZWJvb2sgZm9yIHZpc3VhbGl6aW5nIGRvc2FnZSBwbG90IG9uIEFyYWJpZG9wc2lzIHNhbXBsZSBHYWV0YW4uIApUaGUgaW5wdXQgZGF0YSBpcyBvcmlnaW5hbGx5IGZyb20gYmluLWJ5LXNhbSBhbmFseXNpcy4gRGV0YWlscyBhcmUgYXMgZm9sbG93OgoxLiBCaW4tYnktc2FtLnB5IChjcmVkaXQgdG8gTWVyaWMpLiAzIHNhbXBsZXMgaW5wdXQgaW50byB0aGlzIHB5dGhvbiBzY3JpcHQ6IHRoZSBjaHJvbW9hbmFnZW5lc2lzIHNhbXBsZSwgdHdvIGNvbnRyb2xzIGZyb20gbWl4dHVyZSBvZiBsb3cgY292ZXJhZ2UgV1QgQXJhYmlkb3BzaXMgKGluIG9yZGVyIHRvIGhhdmUgYSBzaW1pbGFyIHNlcXVlbmNlIGRlcHRoIHdpdGggdGhlIGNocm9tb2FuYWdlbmVzaXMgb25lKS4gV2UgdHJpZWQgZGlmZmVyZW50IGJpbiBzaXplcywgZnJvbSAyLjVrYiB0byAxMDBrYiAoMi41LCA1LCAxMCwgMTAwKS4gCjIuIFBsb3Qgb3V0IHJlbGF0aXZlIHJlYWRzIGNvdmVyYWdlIGluIEpNUC4gQnkgdXNpbmcgSk1QLCB3ZSBwbG90IG91dCB0aGUgcmVsYXRpdmUgcmVhZHMgY292ZXJhZ2UgZm9yIHRocmVlIGluZGl2aWR1YWxzLCBhbmQgY29uZmlybSBDTlZzIGF0IHRoZSBiZWdpbm5pbmcgcGFydCBvZiBjaHJvbW9hbmFnZW5lc2lzIHNhbXBsZS4gCjMuIE1vZGlmeSB0aGUgLnR4dCBmaWxlIGZvciB2aXN1YWxpemF0aW9uIGJ5IGdncGxvdC4gVG8gaGF2ZSBhIGNsZWFyIHBsb3QgZm9yIHRoZSByZXZpZXcgcGFwZXIsIHdlIGRlY2lkZSB0byByZW1ha2UgdGhlIGRvc2FnZSBwbG90IHVzaW5nIGdncGxvdC4gU2luY2UgY29udHJvbDEgaXMgdXNpbmcgYXMgdGhlIGJhc2VsaW5lIGluIGJpbi1ieS1zYW0sIHNvIHRoZSByZWxhdGl2ZSByZWFkIGNvdmVyYWdlIGlzIGFsbCAyLCBzbyB3ZSBkZWxldGUgdGhhdCBzYW1wbGVzJyBpbmZvcm1hdGlvbiBpbiB0aGUgaW5wdXQgZmlsZSBhbmQgb25seSBrZWVwIHRoZSBjaHJvbW9hbmFnZW5lc2lzIHNhbXBsZSBhbmQgY29udHJvbDIuIAoKIyBPdXRsaW5lcyAjCjEuIGxvYWQgbGlicmFyaWVzIGFuZCBkYXRhCjIuIAoKIyBJbnB1dCBsaWJyYXJpZXMgYW5kIGRhdGEgIwojIyBhZGQgbGlicmFyaWVzICMjCmBgYHtyfQpsaWJyYXJ5KCJ0aWR5dmVyc2UiKQpsaWJyYXJ5KCJnZ3Bsb3QyIikKbGlicmFyeSgiZ2dwdWJyIikKYGBgCgojIyBhZGQgZGF0YSAjIwpgYGB7cn0KYXJhYmlkb3BzaXMuNWsgPC0gcmVhZC5jc3YoIi9Vc2Vycy93ZW5keS9EZXNrdG9wL0xhYi9hcmFiaWRvcHNpcy9kb3NhZ2VfcGxvdHRpbmdfaW5wdXRfNWtiX2NocjFfMjAyMTExMDgudHh0IixzZXAgPSAiXHQiKQpoZWFkKGFyYWJpZG9wc2lzLjVrKQpgYGAKCmBgYHtyfQphcmFiaWRvcHNpcy4xMDBrIDwtIHJlYWQuY3N2KCIvVXNlcnMvd2VuZHkvRGVza3RvcC9MYWIvYXJhYmlkb3BzaXMvZG9zYWdlX3Bsb3R0aW5nX2lucHV0XzEwMGtiXzIwMjExMTAzLnR4dCIsc2VwID0gIlx0IikKaGVhZChhcmFiaWRvcHNpcy4xMDBrKQpgYGAKCmBgYHtyfQphcmFiaWRvcHNpcy41ay4xMk0gPC0gcmVhZC5jc3YoIi9Vc2Vycy93ZW5keS9EZXNrdG9wL0xhYi9hcmFiaWRvcHNpcy9kb3NhZ2VfcGxvdHRpbmdfaW5wdXRfNWtiX2NocjFfMTJNLTIwMjIwMTE0LnR4dCIsc2VwID0gIlx0IikKaGVhZChhcmFiaWRvcHNpcy41ay4xMk0pCmBgYAoKCiMgcGxvdCAxMDBrYiBhbGwgY2hyb21vc29tZXMgIwpgYGB7cn0KYWxsLjEwMGtiIDwtIGdncGxvdChhcmFiaWRvcHNpcy4xMDBrLCBhZXMoU3RydCxSZWxhdGl2ZS5SZWFkLkNvdmVyYWdlLGNvbG9yPVNhbXBsZSkpICsKICBnZW9tX3BvaW50KGFscGhhPS42KSArCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcyA9IGMoIiM5OTk5OTkiLCIjRTY5RjAwIikpICsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChzaXplID0gOCwgYW5nbGUgPSA5MCksIGF4aXMudGl0bGUueCA9IGVsZW1lbnRfdGV4dChzaXplID0gMTQsIG1hcmdpbiA9IG1hcmdpbih0PTEwKSksIGF4aXMudGl0bGUueSA9IGVsZW1lbnRfdGV4dChzaXplID0gMTQsIG1hcmdpbiA9IG1hcmdpbihyPTEwKSksIGxlZ2VuZC5wb3NpdGlvbiA9ICJOb25lIikgKyAKICBzY2FsZV94X2NvbnRpbnVvdXMobmFtZT0iR2Vub21pYyBQb3NpdGlvbiIsIGJyZWFrcyA9IE5VTEwpICsKICBzY2FsZV95X2NvbnRpbnVvdXMobmFtZT0iUmVsYXRpdmUgcmVhZCBcbmNvdmVyYWdlIikgKwogIGZhY2V0X3dyYXAofkNocm9tLCBucm93ID0gMSwgc2NhbGVzID0gImZyZWVfeCIpCmFsbC4xMDBrYgpgYGAKCiMgc2F2ZSB0aGUgMTAwa2IgcGxvdCAjIwpgYGB7cn0KZ2dzYXZlKCIvVXNlcnMvd2VuZHkvRGVza3RvcC9MYWIvYXJhYmlkb3BzaXMvQXJhYmlkb3BzaXNfZG9zYWdlXzEwMGtiXzIwMjExMTAzLnBuZyIsd2lkdGggPSAxMCwgaGVpZ2h0ID0gMikKYGBgCgoKIyBwbG90IDVrYiBjaHJvbW9zb21lcyAxICMKYGBge3J9CmFsbC41a2IgPC0gZ2dwbG90KGFyYWJpZG9wc2lzLjVrLCBhZXMoU3RydCxhcy5udW1lcmljKFJlbGF0aXZlLlJlYWQuQ292ZXJhZ2UuNWspLGNvbG9yPVNhbXBsZSkpICsKICBnZW9tX3BvaW50KGFscGhhPS42KSArCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcyA9IGMoIiM5OTk5OTkiLCIjRTY5RjAwIikpICsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChzaXplID0gOCwgYW5nbGUgPSA5MCksIGF4aXMudGl0bGUueCA9IGVsZW1lbnRfdGV4dChzaXplID0gMTQsIG1hcmdpbiA9IG1hcmdpbih0PTEwKSksIGF4aXMudGl0bGUueSA9IGVsZW1lbnRfdGV4dChzaXplID0gMTQsIG1hcmdpbiA9IG1hcmdpbihyPTEwKSksIGxlZ2VuZC5wb3NpdGlvbiA9ICJOb25lIikgKwogIHNjYWxlX3hfY29udGludW91cyhuYW1lID0gIkNocm9tb3NvbWUgMSIsIGJyZWFrcyA9IE5VTEwpICsKICBzY2FsZV95X2NvbnRpbnVvdXMobmFtZSA9ICJSZWxhdGl2ZSBSZWFkIFxuQ292ZXJhZ2UiLGxpbWl0cyA9IGMoMSw0KSkKYWxsLjVrYgpgYGAKCiMgc2F2ZSB0aGUgNWtiIHBsb3QgIyMKYGBge3J9Cmdnc2F2ZSgiL1VzZXJzL3dlbmR5L0Rlc2t0b3AvTGFiL2FyYWJpZG9wc2lzL3Bsb3RzL0FyYWJpZG9wc2lzX2Rvc2FnZV81a2JfY2hyMV8yMDIxMTEwOC5wbmciLHdpZHRoID0gMTUsIGhlaWdodCA9IDIpCmBgYAoKCiMgcGxvdCA1a2IgY2hyb21vc29tZXMgMSAwLTFNICMKYGBge3J9CmFsbC41a2IuMTJNIDwtIGdncGxvdChhcmFiaWRvcHNpcy41ay4xMk0sIGFlcyhTdHJ0LGFzLm51bWVyaWMoUmVsYXRpdmUuUmVhZC5Db3ZlcmFnZS41ayksY29sb3I9U2FtcGxlKSkgKwogIGdlb21fcG9pbnQoYWxwaGE9LjYpICsKICBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzID0gYygiIzk5OTk5OSIsIiNFNjlGMDAiKSkgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KHNpemUgPSA4LCBhbmdsZSA9IDkwKSwgYXhpcy50aXRsZS54ID0gZWxlbWVudF90ZXh0KHNpemUgPSAxNCwgbWFyZ2luID0gbWFyZ2luKHQ9MTApKSwgYXhpcy50aXRsZS55ID0gZWxlbWVudF90ZXh0KHNpemUgPSAxNCwgbWFyZ2luID0gbWFyZ2luKHI9MTApKSxsZWdlbmQucG9zaXRpb24gPSAiYm90dG9tIikgKwogIHNjYWxlX3hfY29udGludW91cyhuYW1lID0gIkNocm9tb3NvbWUgMTogMTJNYiB0byAxN01iIiwgYnJlYWtzID0gTlVMTCkgKwogIHNjYWxlX3lfY29udGludW91cyhuYW1lID0gIlJlbGF0aXZlIFJlYWQgXG5Db3ZlcmFnZSIsbGltaXRzID0gYygxLDQpKQphbGwuNWtiLjEyTQpgYGAKCiMjIGFkZCBwb3RlbnRpYWwganVuY3Rpb24gZGF0YSAjIwpgYGB7cn0KYXJhYmlkb3BzaXMuYW5ubyA8LSByZWFkLmNzdignL1VzZXJzL3dlbmR5L0Rlc2t0b3AvTGFiL2FyYWJpZG9wc2lzL2Nyb3NzcmVhZC1zZWxlY3RlZC0yY3RybC01MDBicC1hbm5vdGF0ZS0yMDIxMTAyNy50eHQnLCBzZXAgPSAnXHQnKQpoZWFkKGFyYWJpZG9wc2lzLmFubm8pCmBgYAoKIyBjaGFuZ2UgdGhlIG9yZGVyIGZvciBicmVha3BvaW50MSAjCmBgYHtyfQphcmFiaWRvcHNpcy5hbm5vJENocm9tMSA8LSBmYWN0b3IoYXJhYmlkb3BzaXMuYW5ubyRDaHJvbTEsbGV2ZWxzID0gYygiQ2hyNSIsIkNocjQiLCJDaHIzIiwiQ2hyMiIsIkNocjEiKSkKYGBgCgojIFR3byBkaW1lbnRpb25hbCBwbG90ICMKYGBge3J9CnNjYXRwbG90IDwtIGdncGxvdChhcmFiaWRvcHNpcy5hbm5vLCBhZXMoQ2hyb20xX2JpbnN0YXJ0LENocm9tMl9iaW5zdGFydCxhbHBoYT1TaXplLHNpemU9IjEiKSkgKwogIGdlb21fcG9pbnQoKSArCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcyA9IGMoJyMwMDAwMDAnKSkgKwogIHNjYWxlX2FscGhhX21hbnVhbCh2YWx1ZXMgPSBjKDAsMC4yKSkgKwogIHNjYWxlX3NpemVfbWFudWFsKHZhbHVlcyA9IGMoMS41KSkgKwogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICdub25lJywgYXhpcy50ZXh0LnggPSBlbGVtZW50X2JsYW5rKCksIGF4aXMudGV4dC55ID0gZWxlbWVudF9ibGFuaygpLCBheGlzLnRpY2tzID0gZWxlbWVudF9ibGFuaygpICxheGlzLnRpdGxlLnggPSBlbGVtZW50X3RleHQoc2l6ZSA9IDE0LCBtYXJnaW4gPSBtYXJnaW4odD0xMCkpLCBheGlzLnRpdGxlLnkgPSBlbGVtZW50X3RleHQoc2l6ZSA9IDE0LG1hcmdpbiA9IG1hcmdpbihyPTEwKSkpICsKICBzY2FsZV94X2NvbnRpbnVvdXMobmFtZT0iQnJlYWtwb2ludDEiKSArCiAgc2NhbGVfeV9jb250aW51b3VzKG5hbWU9IkJyZWFrcG9pbnQyIikgKyAKICBmYWNldF9ncmlkKHJvd3MgPSB2YXJzKGFyYWJpZG9wc2lzLmFubm8kQ2hyb20xKSwgY29scyA9IHZhcnMoYXJhYmlkb3BzaXMuYW5ubyRDaHJvbTIpLHNjYWxlcyA9ICJmcmVlIikKc2NhdHBsb3QKYGBgCgojIHNhdmUgdGhlIHNjYXR0ZXIgcGxvdCAjIwpgYGB7cn0KZ2dzYXZlKCIvVXNlcnMvd2VuZHkvRGVza3RvcC9MYWIvYXJhYmlkb3BzaXMvcGxvdHMvQXJhYmlkb3BzaXNfcG90ZW50aWFsX2p1bmN0aW9uc19zY2F0cGxvdF8yMDIxMTEwMy5wbmciLGhlaWdodCA9IDUsIHdpZHRoID0gNSkKYGBgCgojIGFycmFnbmUgdHdvIHBsb3RzIG9uIG9uZSBwYWdlICMKYGBge3J9Cm9uZXBhZ2UgPC0gZ2dhcnJhbmdlKGFsbC4xMDBrYixhbGwuNWtiLGFsbC41a2IuMTJNLGxhYmVscyA9IGMoIkEiLCJCIiwiQyIpLCBuY29sID0gMSwgbnJvdyA9IDMsIHdpZHRocyA9IDEwLCBoZWlnaHRzID0gYygzLDMsNCkpCm9uZXBhZ2UKYGBgCgojIHNhdmUgb25lIHBhZ2UgdGhyZWUgcGxvdHMgIwpgYGB7cn0KZ2dzYXZlKCIvVXNlcnMvd2VuZHkvRGVza3RvcC9MYWIvYXJhYmlkb3BzaXMvcGxvdHMvQXJhYmlkb3BzaXNfZG9zYWdlX2FsbF9jaHIxXzEyTV8yMDIyMDExNC5wbmciLHdpZHRoID0gMTApCmBgYAoKCgoKCg==